#install.packages("ggplot2")
library(ggplot2)
txhousing <- read.csv("housing.csv")
View(txhousing)
cl <- class(txhousing)
nam <- names(txhousing)
dimen <- dim(txhousing)
Q1 <- c(cl, nam, dimen)
Q1
[1] "tbl_df" "tbl" "data.frame" "city" "year"
[6] "month" "sales" "volume" "median" "listings"
[11] "inventory" "date" "8602" "9"
Q2 <- ls.str(txhousing)
Q3 <- summary(txhousing$median)
Q4 <- table(txhousing$year)
Q4
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
552 552 552 552 552 552 552 552 552 552 552 552 552 552 552 322
unique(txhousing$city)
table(txhousing$city)
Q5 <-length(unique(txhousing$city))
boxplot(txhousing$sales, horizontal = TRUE, xlab="stuff", main="Title")

NA
NA
boxplot(txhousing$year, txhousing$sales)

##horizontal comp boxplot
boxplot(txhousing$sales~txhousing$year, horizontal = TRUE, xlab = ("Year"),frame=FALSE, ylab = ("Sales"))

NA
NA
#q8
boxplot(txhousing$sales ~ txhousing$year,
horizontal = TRUE,
las = 1,
xlab = "Sales",
ylab = "Year",
main = "Sales by Year in Texas Housing",
frame = FALSE) # or bty = "n"

boxplot(txhousing$median~txhousing$year,
horizontal = TRUE,
xlab = "Median Sale Price",
ylab = "Year",
main="Housing Prices",
las=1,
color="lightgreen",
frame=FALSE)

boxplot(txhousing$sales ~ txhousing$year,
horizontal = FALSE,
las = 1,
xlab = "Sales",
ylab = "Year",
main = "Sales by Year in Texas Housing",
frame = FALSE) # or bty = "n")

boxplot(txhousing$median ~ txhousing$year,
horizontal = TRUE,
las = 1, # rotate y-axis labels horizontally
xlab = "Median Sales Price",
ylab = "Year",
main = "Median Sales Price by Year",
col = "lightgreen", # color the boxes light green
frame = FALSE) # remove the plot frame

#Q9a
plot(txhousing$sales, txhousing$median,
pch=16,
las=1,
cex=.75)
abline(lm(median~sales, data=txhousing),
col="red",
lwd=4)

plot(median ~ sales,
data = txhousing,
pch = 16, # solid black dots
cex = 0.75, # slightly smaller points
xlab = "Sales",
ylab = "Median Sales Price",
main = "Median Price vs. Sales")

Q10 <- sum(is.na(txhousing$median))
#sum(proportions(is.na(txhousing$median)))
Q11 <- mean(is.na(txhousing$median))
#q12
NA_meidan_index <- c(which(is.na(txhousing$median)))
length(NA_meidan_index)
[1] 616
#q13
txhousing[NA_meidan_index, c("city", "year", "sales", "median")]
NA
#q14
txhousing_clean <- txhousing[-NA_meidan_index, c("city", "year", "sales", "median", "month")]
##correlation
cor(txhousing$median, txhousing$listings, use="complete.obs")
[1] 0.2451009
cor(txhousing$median, txhousing$sales, use="complete.obs")
[1] 0.3449698
cor(txhousing$median, txhousing$inventory, use="complete.obs")
[1] -0.1421893
cor(txhousing$sales, txhousing$listings, use="complete.obs")
[1] 0.9214641
##correlation matrix
print(cor(txhousing[, c('sales', 'year', 'inventory', 'listings', 'month', 'median')], use = "complete.obs"))
sales year inventory listings month median
sales 1.00000000 -0.01619670 -0.19290223 0.921391249 0.017088462 0.33507418
year -0.01619670 1.00000000 0.09757065 -0.054056894 -0.044882696 0.48289997
inventory -0.19290223 0.09757065 1.00000000 -0.100199728 0.014515758 -0.14218928
listings 0.92139125 -0.05405689 -0.10019973 1.000000000 0.005974225 0.24560270
month 0.01708846 -0.04488270 0.01451576 0.005974225 1.000000000 0.03699859
median 0.33507418 0.48289997 -0.14218928 0.245602698 0.036998586 1.00000000
#lm(sales ~ median, data = txhousing)
mod1 <- lm(median ~ sales, data = txhousing_clean)
#Create a linear model predicting median sales price based on sales for the clean dataset. Store the model as mod1.
mod1
Call:
lm(formula = median ~ sales, data = txhousing_clean)
Coefficients:
(Intercept) sales
121741.82 11.57
##q16
txhousing_clean[1, c("sales","median")]
Q16 <- 121741.82 + 11.57 * 72 ##72 is the sales number?
predict(mod1, txhousing_clean[1, ])
1
122575.2
Q17 <- summary(mod1)
summary(lm(median~sales, data=txhousing_clean))
#q18
round(cor(txhousing_clean$sales, txhousing_clean$median, use='complete.obs'), 2)
[1] 0.34
#q19
options = 999 # turn off sci notation
mod2 <- lm(median~sales+month, data=txhousing_clean)
summary(mod2)
Call:
lm(formula = median ~ sales + month, data = txhousing_clean)
Residuals:
Min 1Q Median 3Q Max
-70676 -26504 -3740 19360 164791
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 119423.8803 848.1637 140.803 < 0.0000000000000002 ***
sales 11.5564 0.3523 32.803 < 0.0000000000000002 ***
month 363.6054 113.9494 3.191 0.00142 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 35050 on 7982 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.1201, Adjusted R-squared: 0.1199
F-statistic: 544.9 on 2 and 7982 DF, p-value: < 0.00000000000000022
##rsquared is quite low .1199 thest two pred explain on a a small amount of the variability in mdeian sales price. square footage, location, number of bedrooms age of home probably do a better prediction.
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiNpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikKbGlicmFyeShnZ3Bsb3QyKQpgYGAKCmBgYHtyfQp0eGhvdXNpbmcgPC0gcmVhZC5jc3YoImhvdXNpbmcuY3N2IikKVmlldyh0eGhvdXNpbmcpCmBgYAoKYGBge3J9CmNsIDwtIGNsYXNzKHR4aG91c2luZykKbmFtIDwtIG5hbWVzKHR4aG91c2luZykKZGltZW4gPC0gZGltKHR4aG91c2luZykKUTEgPC0gYyhjbCwgbmFtLCBkaW1lbikKCmBgYAoKYGBge3J9ClEyIDwtIGxzLnN0cih0eGhvdXNpbmcpCmBgYAoKYGBge3J9ClEzIDwtIHN1bW1hcnkodHhob3VzaW5nJG1lZGlhbikKYGBgCgpgYGB7cn0KUTQgPC0gdGFibGUodHhob3VzaW5nJHllYXIpClE0CmBgYAoKYGBge3J9CnVuaXF1ZSh0eGhvdXNpbmckY2l0eSkKdGFibGUodHhob3VzaW5nJGNpdHkpClE1IDwtbGVuZ3RoKHVuaXF1ZSh0eGhvdXNpbmckY2l0eSkpCmBgYAoKYGBge3J9CmJveHBsb3QodHhob3VzaW5nJHNhbGVzLCAgaG9yaXpvbnRhbCA9IFRSVUUsIHhsYWI9InN0dWZmIiwgbWFpbj0iVGl0bGUiKQoKCmBgYAoKYGBge3J9CmJveHBsb3QodHhob3VzaW5nJHllYXIsIHR4aG91c2luZyRzYWxlcykKYGBgCgpgYGB7cn0KIyNob3Jpem9udGFsIGNvbXAgYm94cGxvdApib3hwbG90KHR4aG91c2luZyRzYWxlc350eGhvdXNpbmckeWVhciwgaG9yaXpvbnRhbCA9IFRSVUUsIHhsYWIgPSAoIlllYXIiKSxmcmFtZT1GQUxTRSwgeWxhYiA9ICgiU2FsZXMiKSkKCgpgYGAKCmBgYHtyfQojcTgKYm94cGxvdCh0eGhvdXNpbmckc2FsZXMgfiB0eGhvdXNpbmckeWVhciwKICAgICAgICBob3Jpem9udGFsID0gVFJVRSwKICAgICAgICBsYXMgPSAxLAogICAgICAgIHhsYWIgPSAiU2FsZXMiLAogICAgICAgIHlsYWIgPSAiWWVhciIsCiAgICAgICAgbWFpbiA9ICJTYWxlcyBieSBZZWFyIGluIFRleGFzIEhvdXNpbmciLAogICAgICAgIGZyYW1lID0gRkFMU0UpICAgIyBvciBidHkgPSAibiIKCgpib3hwbG90KHR4aG91c2luZyRtZWRpYW5+dHhob3VzaW5nJHllYXIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUsCiAgICAgICAgeGxhYiA9ICJNZWRpYW4gU2FsZSBQcmljZSIsCiAgICAgICAgeWxhYiA9ICJZZWFyIiwKICAgICAgICBtYWluPSJIb3VzaW5nIFByaWNlcyIsCiAgICAgICAgbGFzPTEsCiAgICAgICAgY29sb3I9ImxpZ2h0Z3JlZW4iLAogICAgICAgIGZyYW1lPUZBTFNFKQpgYGAKCmBgYHtyfQpib3hwbG90KHR4aG91c2luZyRzYWxlcyB+IHR4aG91c2luZyR5ZWFyLAogICAgICAgIGhvcml6b250YWwgPSBGQUxTRSwKICAgICAgICBsYXMgPSAxLAogICAgICAgIHhsYWIgPSAiU2FsZXMiLAogICAgICAgIHlsYWIgPSAiWWVhciIsCiAgICAgICAgbWFpbiA9ICJTYWxlcyBieSBZZWFyIGluIFRleGFzIEhvdXNpbmciLAogICAgICAgIGZyYW1lID0gRkFMU0UpICAgIyBvciBidHkgPSAibiIpCmBgYAoKYGBge3J9CiNxOEEKYm94cGxvdCh0eGhvdXNpbmckbWVkaWFuIH4gdHhob3VzaW5nJHllYXIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUsCiAgICAgICAgbGFzID0gMSwgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyByb3RhdGUgeS1heGlzIGxhYmVscyBob3Jpem9udGFsbHkKICAgICAgICB4bGFiID0gIk1lZGlhbiBTYWxlcyBQcmljZSIsCiAgICAgICAgeWxhYiA9ICJZZWFyIiwKICAgICAgICBtYWluID0gIk1lZGlhbiBTYWxlcyBQcmljZSBieSBZZWFyIiwKICAgICAgICBjb2wgPSAibGlnaHRncmVlbiIsICAgICAgICAgICAgICAgICMgY29sb3IgdGhlIGJveGVzIGxpZ2h0IGdyZWVuCiAgICAgICAgZnJhbWUgPSBGQUxTRSkgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSB0aGUgcGxvdCBmcmFtZQoKYGBgCgpgYGB7cn0KI1E5YQpwbG90KHR4aG91c2luZyRzYWxlcywgdHhob3VzaW5nJG1lZGlhbiwKICAgICBwY2g9MTYsCiAgICAgbGFzPTEsCiAgICAgY2V4PS43NSkKCmFibGluZShsbShtZWRpYW5+c2FsZXMsIGRhdGE9dHhob3VzaW5nKSwKICAgICAgIGNvbD0icmVkIiwKICAgICAgIGx3ZD00KQpgYGAKCmBgYHtyfQpwbG90KG1lZGlhbiB+IHNhbGVzLAogICAgIGRhdGEgPSB0eGhvdXNpbmcsCiAgICAgcGNoID0gMTYsICAgICAgICAgICMgc29saWQgYmxhY2sgZG90cwogICAgIGNleCA9IDAuNzUsICAgICAgICAjIHNsaWdodGx5IHNtYWxsZXIgcG9pbnRzCiAgICAgeGxhYiA9ICJTYWxlcyIsCiAgICAgeWxhYiA9ICJNZWRpYW4gU2FsZXMgUHJpY2UiLAogICAgIG1haW4gPSAiTWVkaWFuIFByaWNlIHZzLiBTYWxlcyIpCgpgYGAKCmBgYHtyfQpRMTAgPC0gc3VtKGlzLm5hKHR4aG91c2luZyRtZWRpYW4pKQpgYGAKCmBgYHtyfQojc3VtKHByb3BvcnRpb25zKGlzLm5hKHR4aG91c2luZyRtZWRpYW4pKSkKUTExIDwtIG1lYW4oaXMubmEodHhob3VzaW5nJG1lZGlhbikpCgpgYGAKCmBgYHtyfQojcTEyCgpOQV9tZWlkYW5faW5kZXggPC0gYyh3aGljaChpcy5uYSh0eGhvdXNpbmckbWVkaWFuKSkpCmxlbmd0aChOQV9tZWlkYW5faW5kZXgpCmBgYAoKYGBge3J9CiNxMTMKdHhob3VzaW5nW05BX21laWRhbl9pbmRleCwgYygiY2l0eSIsICJ5ZWFyIiwgInNhbGVzIiwgIm1lZGlhbiIpXQoKYGBgCgpgYGB7cn0KI3ExNAp0eGhvdXNpbmdfY2xlYW4gPC0gdHhob3VzaW5nWy1OQV9tZWlkYW5faW5kZXgsIGMoImNpdHkiLCAieWVhciIsICJzYWxlcyIsICJtZWRpYW4iLCAibW9udGgiKV0KCmBgYAoKYGBge3J9CiMjY29ycmVsYXRpb24KCmNvcih0eGhvdXNpbmckbWVkaWFuLCB0eGhvdXNpbmckbGlzdGluZ3MsIHVzZT0iY29tcGxldGUub2JzIikKY29yKHR4aG91c2luZyRtZWRpYW4sIHR4aG91c2luZyRzYWxlcywgdXNlPSJjb21wbGV0ZS5vYnMiKQpjb3IodHhob3VzaW5nJG1lZGlhbiwgdHhob3VzaW5nJGludmVudG9yeSwgdXNlPSJjb21wbGV0ZS5vYnMiKQpjb3IodHhob3VzaW5nJHNhbGVzLCB0eGhvdXNpbmckbGlzdGluZ3MsIHVzZT0iY29tcGxldGUub2JzIikKYGBgCgpgYGB7cn0KIyNjb3JyZWxhdGlvbiBtYXRyaXgKcHJpbnQoY29yKHR4aG91c2luZ1ssIGMoJ3NhbGVzJywgJ3llYXInLCAnaW52ZW50b3J5JywgJ2xpc3RpbmdzJywgJ21vbnRoJywgJ21lZGlhbicpXSwgdXNlID0gImNvbXBsZXRlLm9icyIpKQpgYGAKCmBgYHtyfQojbG0oc2FsZXMgfiBtZWRpYW4sIGRhdGEgPSB0eGhvdXNpbmcpCm1vZDEgPC0gbG0obWVkaWFuIH4gc2FsZXMsIGRhdGEgPSB0eGhvdXNpbmdfY2xlYW4pCiNDcmVhdGUgYSBsaW5lYXIgbW9kZWwgcHJlZGljdGluZyBtZWRpYW4gc2FsZXMgcHJpY2UgYmFzZWQgb24gc2FsZXMgZm9yIHRoZSBjbGVhbiBkYXRhc2V0LiBTdG9yZSB0aGUgbW9kZWwgYXMgbW9kMS4KbW9kMQpgYGAKCmBgYHtyfQojI3ExNgoKdHhob3VzaW5nX2NsZWFuWzEsIGMoInNhbGVzIiwibWVkaWFuIildCgpRMTYgPC0gMTIxNzQxLjgyICsgMTEuNTcgKiA3MiAjIzcyIGlzIHRoZSBzYWxlcyBudW1iZXI/ICAKcHJlZGljdChtb2QxLCB0eGhvdXNpbmdfY2xlYW5bMSwgXSkKYGBgCgpgYGB7cn0KUTE3IDwtIHN1bW1hcnkobW9kMSkKc3VtbWFyeShsbShtZWRpYW5+c2FsZXMsIGRhdGE9dHhob3VzaW5nX2NsZWFuKSkKYGBgCgpgYGB7cn0KI3ExOAoKcm91bmQoY29yKHR4aG91c2luZ19jbGVhbiRzYWxlcywgdHhob3VzaW5nX2NsZWFuJG1lZGlhbiwgdXNlPSdjb21wbGV0ZS5vYnMnKSwgMikKYGBgCgpgYGB7cn0KI3ExOQpvcHRpb25zID0gOTk5ICMgdHVybiBvZmYgc2NpIG5vdGF0aW9uCm1vZDIgPC0gbG0obWVkaWFufnNhbGVzK21vbnRoLCBkYXRhPXR4aG91c2luZ19jbGVhbikKc3VtbWFyeShtb2QyKQpgYGAKCmBgYHtyfQojI3JzcXVhcmVkIGlzIHF1aXRlIGxvdyAuMTE5OSB0aGVzdCB0d28gcHJlZCBleHBsYWluIG9uIGEgYSBzbWFsbCBhbW91bnQgb2YgdGhlIHZhcmlhYmlsaXR5IGluIG1kZWlhbiBzYWxlcyBwcmljZS4gIHNxdWFyZSBmb290YWdlLCBsb2NhdGlvbiwgbnVtYmVyIG9mIGJlZHJvb21zIGFnZSBvZiBob21lIHByb2JhYmx5IGRvIGEgYmV0dGVyIHByZWRpY3Rpb24uICAKYGBgCg==